* this do-file constructs the dataset necessary for Table2
set more off

cd d:/data

* construct the new base_final file

use pat80_11_16c.dta, clear

keep gyear country subcat
gen b_cst = 1
collapse (sum) b_cst, by (gyear country subcat)
bysort gyear: egen temp = total(b_cst)
gen nf = 1/(1 - b_cst/temp) 

keep gyear country subcat b_cst nf

sa base_final, replace

use pat80_11_16c.dta, clear

keep patent gdate country subcat



ren patent citing
so citing
mer 1:n citing using cite80_11.dta

* _m=1 are patents that never cited any other patent
* _m=2 are patents that were issued in some other period than 1980-2011
* count if _m==2&gyear>=1980&gyear<=2011
keep if _m==3
so cited gdate
qui by cited:gen cite_order = _n

drop _m

ren gdate date_cited
ren country country1
ren subcat subcat1
drop citing

so cited
sa temp.dta,replace


use pat80_11_16c.dta, clear
keep patent gdate gyear country subcat
gen cited = patent
so cited

mer 1:n cited using temp.dta
drop if _m==2

* granted between 1976-1979 or withdrawn patent

drop cited _m

gen y = date_cited - gdate


preserve

keep if y<=730

gen sc = country == country1
gen ss = subcat == subcat1
gen self = sc+ss

drop if self==2

sort patent

drop if country==""



sa temp.dta, replace


* calculate the total number of citations that were made

use temp, clear

drop if gyear>=2010

gen x=1

collapse (sum) x, by (gyear country subcat)

ren x tcit

sa tcit.dta, replace

* construct the datafile for panel regression

use base_final, replace

drop if country==""
drop if subcat==.
drop if gyear>=2010

merge 1:1 gyear country subcat using tcit
drop if _m==2
replace tcit=0 if tcit==.
drop _m

gen y_avcit = tcit/b_cst

sa panel_16c_final, replace

use panel_16c_final, clear
order b_cst y_avcit country subcat gyear
encode country, gen(country_i)
recode country_i (1=11) (2=9) (3=10) (4=13) (5=3) (6=4) (7=5) (8=6) (9=12) (10=14) (11=2) (12=8) (13=15) (14=16) (15=7) (16=1)
label define ccode 1 "US" 2 "JP" 3 "DE" 4 "EU" 5 "FR" 6 "GB" 7 "TW" 8 "KR" 9 "CA" 10 "CH" 11 "AU" 12 "IL" 13 "CN" 14 "IN" 15 "RU" 16 "RW", replace
label values country_i ccode

tab country_i, gen(c)
drop country_i
for var c1-c16: label variable X ""

ren (c1 c2 c3 c4 c5 c6 c7 c8 c9 c10 c11 c12 c13 c14 c15 c16 ) (US JP DE EU FR GB TW KR CA CH AU IL CN IN RU RW)

tab subcat, gen(s)
tab gyear, gen(t)
 
forvalues i=1980(1)2009 {
local x = `i'-1979
ren t`x' t`i'
}

ren (s16 s17 s18 s19 s20 s21 s22 s23 s24 s25 s26 s27 s28 s29 s30 s31 s32 s33 s34 s35 s36 s37)  (s41 s42 s43 s44 s45 s46 s49 s51 s52 s53 s54 s55 s59 s61 s62 s63 s64 s65 s66 s67 s68 s69)
ren (s11 s12 s13 s14 s15) (s25 s31 s32 s33 s39)
ren (s1 s2 s3 s4 s5 s6 s7 s8 s9 s10) (s11 s12 s13 s14 s15 s19 s21 s22 s23 s24)

forvalues k=8(1)11{
forvalues i=1987(1)1993{
local temp =`i'+`k'
gen j`i'_`temp'=(gyear>`i')&(gyear<=`temp')
}
}

forvalues i=1995(1)2004{
gen j`i'=(gyear>`i')
}

forvalues i=1995(1)2004{
for var US JP DE EU FR GB TW KR CA CH AU IL CN IN RU RW : gen X`i' = X*j`i'
}

forvalues k=8(1)11{
forvalues i=1987(1)1993{
local temp =`i'+`k'
for var US JP DE EU FR GB TW KR CA CH AU IL CN IN RU RW : gen X`i'_`temp' = X*j`i'_`temp'
}
}

gen CS=country+string(subcat)
gen CT=country+string(gyear)
gen ST=string(subcat)+string(gyear)

gen cat=floor(subcat/10)
order cat, before(subcat)

tab cat, gen(c)

order tcit, last

sort gy coun sub

sa panel_16c_final, replace


restore


keep if y<=1825

gen sc = country == country1
gen ss = subcat == subcat1
gen self = sc+ss

drop if self==2

sort patent

drop if country==""

sa temp.dta, replace


* calculate the total number of citations that were made

use temp, clear

drop if gyear>=2007

gen x=1

collapse (sum) x, by (gyear country subcat)

ren x tcit

sa tcit.dta, replace

* construct the datafile for panel regression

use base_final, replace
edit

drop if country==""
drop if subcat==.
drop if gyear>=2007

merge 1:1 gyear country subcat using tcit
drop if _m==2
replace tcit=0 if tcit==.
drop _m

gen y_avcit_5 = tcit/b_cst

keep gyear country subcat y_avcit_5

sa temp, replace

use panel_16c_final, clear

merge 1:1 country subcat gyear using temp
drop _m

ren y_avcit  y_n
ren y_avcit_5  y_n_5

replace y_n = y_n*nf
replace y_n_5 = y_n_5*nf


order b_cst nf y_n y_n_5

sa panel_16c_final, replace

use panel_16c, clear

keep y_avcit country subcat gyear
ren y_avcit y

sa temp, replace

use panel_16c_final, clear

merge 1:1 country subcat gyear using temp
drop _m

sa panel_16c_final, replace

use panel_16c_5yrs, clear

keep y_avcit country subcat gyear
ren y_avcit y_5

sa temp, replace

use panel_16c_final, clear

merge 1:1 country subcat gyear using temp
drop _m

order b_cst nf y y_n y_5 y_n_5

sa panel_16c_final, replace


